* =============================================================================================
* Date: September 2024
* Paper: Labor Market Participation, Marriage and Individual Welfare
* This program uses the raw data files from PSID to prepare a cleaned crossectional data
* Database used: Raw PSID family files from 2019 
* Output: PSID2019 
* =============================================================================================

clear all
set maxvar 32000

* Define the directory of extracted data files from PSID
global path_raw "J:\Staff Areas\ks1779\Research\Projects\Project - Nonparticipation\Application_V2\V5_17092024\PSID\" /// replace the directory here

* 2019 Family file
use "$path_raw\fam2019er\FAM2019ER.dta", clear
capture rename ER72002 familyid



***************** HOUSEHOLD INFORMATION **************************
* ER72016 "# IN FU" NUM(2.0): Number of Persons in FU at the Time of the 2019 Interview
* ER72021 "# CHILDREN IN FU": Number of Persons Now in the FU Under 18 Years of Age
gen nhhmem = ER72016
gen nchild = ER72021

** ER73422 "G49 WTR SPOUSE IN FU NOW": Whether there is a Spouse/Partner in the FU now
** 1 = Spouse/Partner in FU now
** 5 = All others
gen couple = 1 if ER73422 == 1
replace couple = 0 if ER73422 == 5

*ER72018: "SEX OF REFERENCE PERSON": Sex of 2019 Reference Person
gen singlemale = 1 if couple == 0 & ER72018 == 1
replace singlemale = 0 if missing(singlemale)

gen singlefemale = 1 if couple == 0 & ER72018 == 2
replace singlefemale = 0 if missing(singlefemale)

gen headmale = 1 if ER72018 == 1
replace headmale = 0 if ER72018 == 2



***************** AGE **************************
*ER72017: "AGE OF REFERENCE PERSON": Age of 2019 Reference Person
*ER72019 "AGE OF SPOUSE": Age of 2019 Spouse/Partner

gen agem = ER72017 if inrange(ER72017, 14, 120) & ((headmale == 1 & couple == 1) | singlemale == 1)
replace agem = ER72019 if inrange(ER72019, 14, 120) & (headmale == 0 & couple == 1)

gen agef = ER72019 if inrange(ER72019, 14, 120) & (headmale == 1 & couple == 1)
replace agef = ER72017 if inrange(ER72017, 14, 120) & (singlefemale == 1 | (headmale == 0 & couple == 1))



**************** WAGE ***************************
* ER77414 "REF PERSON WAGE RATE-2018": Reference Person's 2018 Wage Rate (The values for this variable represent dollars and cents per hour)
* ER77415 "SPOUSE WAGE RATE-2018": Spouse's/Partner's 2018 Wage Rate (The values for this variable represent dollars and cents per hour)

gen wagem = ER77414 if inrange(ER77414, 0.01, 998.99) & ((headmale == 1 & couple == 1) | singlemale == 1)
replace wagem = ER77415 if inrange(ER77415, 0.01, 998.99) & (headmale == 0 & couple == 1)

gen wagef = ER77415 if inrange(ER77415, 0.01, 998.99) & (headmale == 1 & couple == 1)
replace wagef = ER77414 if inrange(ER77414, 0.01, 998.99) & (singlefemale == 1 | (headmale == 0 & couple == 1))

quietly sum wagem, de
gen trim1 = wagem  if wagem >=r(p1) & wagem <=r(p99)

quietly sum wagef, de
gen trim2 = wagef  if wagef >=r(p1) & wagef <=r(p99)

drop if missing(trim1) & !missing(wagem)
drop if missing(trim2) & !missing(wagef)



**************** OUT OF LABOR FORCE ***************************
* ER72389 "BC7 DAYS OUT OF LABOR FORCE (RP)" The values for this variable represent the actual number of reported days that Reference Person did not have a job and was not looking for one, in 2018.
* ER72391 "BC7 WEEKS OUT OF LABOR FORCE (RP)" The values for this variable represent the actual number of reported weeks that Reference Person did not have a job and was not looking for one, in 2018.
* ER72393 "BC7 MONTHS OUT OF LABOR FORCE (RP)" The values for this variable represent the actual number of reported months that Reference Person did not have a job and was not looking for one.

* ER72666 "DE7 DAYS OUT OF LABOR FORCE (SP)" The values for this variable represent the actual number of reported days that Spouse/Partner did not have a job and was not looking for one, in 2018.
* ER72668 "DE7 WEEKS OUT OF LABOR FORCE (SP)" The values for this variable represent the actual number of reported weeks that Spouse/Partner did not have a job and was not looking for one, in 2018.
* ER72670 "DE7 MONTHS OUT OF LABOR FORCE (SP)" The values for this variable represent the actual number of reported months that Spouse/Partner did not have a job and was not looking for one, in 2018.

gen outlaborm = .
gen outlaborf = .

gen day_outlaborm = .
gen day_outlaborf = .

gen week_outlaborm = .
gen week_outlaborf = .

gen month_outlaborm = .
gen month_outlaborf = .

replace day_outlaborm = ER72389 if inrange(ER72389, 1, 365) & ((headmale == 1 & couple == 1) | singlemale == 1)
replace day_outlaborm = ER72666 if inrange(ER72666, 1, 365) & (headmale == 0 & couple == 1)

replace week_outlaborm = ER72391 if inrange(ER72391, 1, 52) & ((headmale == 1 & couple == 1) | singlemale == 1)
replace week_outlaborm = ER72668 if inrange(ER72668, 1, 52) & (headmale == 0 & couple == 1)

replace month_outlaborm = ER72393 if inrange(ER72393, 1, 12) & ((headmale == 1 & couple == 1) | singlemale == 1)
replace month_outlaborm = ER72670 if inrange(ER72670, 1, 12) & (headmale == 0 & couple == 1)

replace day_outlaborf = ER72389 if inrange(ER72389, 1, 365) & (singlefemale == 1 | (headmale == 0 & couple == 1))
replace day_outlaborf = ER72666 if inrange(ER72666, 1, 365) & (headmale == 1 & couple == 1)

replace week_outlaborf = ER72391 if inrange(ER72391, 1, 52) & (singlefemale == 1 | (headmale == 0 & couple == 1))
replace week_outlaborf = ER72668 if inrange(ER72668, 1, 52) & (headmale == 1 & couple == 1)

replace month_outlaborf = ER72393 if inrange(ER72393, 1, 12) & (singlefemale == 1 | (headmale == 0 & couple == 1))
replace month_outlaborf = ER72670 if inrange(ER72670, 1, 12) & (headmale == 1 & couple == 1)

replace outlaborm = 1 if day_outlaborm == 365| week_outlaborm == 52 | month_outlaborm == 12
replace outlaborf = 1 if day_outlaborf == 365| week_outlaborf == 52 | month_outlaborf == 12
 

 
**************** INVOLUNTARY UNEMPLOYMENT ***************************
* ER72370 "BC8 DAYS UNEMPLOYED" The values for this variable represent the actual number of days Reference Person reported being unemployed, in 2018
* ER72372 "BC8 WEEKS UNEMPLOYED" The values for this variable represent the actual number of weeks Reference Person reported being unemployed, in 2018.
* ER72374 "BC8 MONTHS UNEMPLOYED" The values for this variable represent the actual number of months Reference Person reported being unemployed, in 2018.

* ER72647 "DE8 DAYS UNEMPLOYED" The values for this variable represent the actual number of reported days Spouse/Partner was unemployed, in 2018
* ER72649 "DE8 WEEKS UNEMPLOYED" The values for this variable represent the actual number of reported weeks Spouse/Partner was unemployed, in 2018.
* ER72651 "DE8 MONTHS UNEMPLOYED" The values for this variable represent the actual number of reported months Spouse/Partner was unemployed, in 2018

gen involuntarym = .
gen involuntaryf = .

gen day_involuntarym = .
gen day_involuntaryf = .

gen week_involuntarym = .
gen week_involuntaryf = .

gen month_involuntarym = .
gen month_involuntaryf = .

replace day_involuntarym = ER72370 if inrange(ER72370, 1, 365) & ((headmale == 1 & couple == 1) | singlemale == 1)
replace day_involuntarym = ER72647 if inrange(ER72647, 1, 365) & (headmale == 0 & couple == 1)

replace week_involuntarym = ER72372 if inrange(ER72372, 1, 52) & ((headmale == 1 & couple == 1) | singlemale == 1)
replace week_involuntarym = ER72649 if inrange(ER72649, 1, 52) & (headmale == 0 & couple == 1)

replace month_involuntarym = ER72374 if inrange(ER72374, 1, 12) & ((headmale == 1 & couple == 1) | singlemale == 1)
replace month_involuntarym = ER72651 if inrange(ER72651, 1, 12) & (headmale == 0 & couple == 1)

replace day_involuntaryf = ER72370 if inrange(ER72370, 1, 365) & (singlefemale == 1 | (headmale == 0 & couple == 1))
replace day_involuntaryf = ER72647 if inrange(ER72647, 1, 365) & (headmale == 1 & couple == 1)

replace week_involuntaryf = ER72372 if inrange(ER72372, 1, 52) & (singlefemale == 1 | (headmale == 0 & couple == 1))
replace week_involuntaryf = ER72649 if inrange(ER72649, 1, 52) & (headmale == 1 & couple == 1)

replace month_involuntaryf = ER72374 if inrange(ER72374, 1, 12) & (singlefemale == 1 | (headmale == 0 & couple == 1))
replace month_involuntaryf = ER72651 if inrange(ER72651, 1, 12) & (headmale == 1 & couple == 1)

replace involuntarym = 1 if day_involuntarym == 365| week_involuntarym == 52 | month_involuntarym == 12
replace involuntaryf = 1 if day_involuntaryf == 365| week_involuntaryf == 52 | month_involuntaryf == 12



***************** TIME-USE ***********************
*ER72408 "BC60A HOURS SPENT WORKING": These next questions are about the time (you spend/Reference Person spends) working in a typical week. In a typical week, how many hours (do you/does [he/she]) spend working for pay?--REFERENCE PERSON
*ER72685 "DE60A HOURS SPENT WORKING": These next questions are about the time (you spend /[Spouse/Partner] spends) working in a typical week. In a typical week, how many hours (do you/does [he/she]) spend working for pay?
gen laborm = ER72408 if inrange(ER72408, 0,112) & ((couple == 1 & headmale == 1) | (singlemale == 1))
replace laborm = ER72685 if inrange(ER72685, 0,112) & (couple == 1 & headmale == 0)

gen laborf = ER72685 if inrange(ER72685,0,112) & (couple == 1 & headmale == 1) 
replace laborf = ER72408 if inrange(ER72408,0,112) & ((couple == 1 & headmale == 0) | (singlefemale == 1))

*----------- COMPARISON WIH LABOR HOURS 2018 -----------------
* ER72172: On average, how many hours a week did (you/he/she) work on (all of) (your/his/her) (job/jobs) during 2018?
* ER72449: On average, how many hours a week did (you/he/she) work on (all of) (your/his/her) (job/jobs) during 2018?

gen laborm2018 = ER72172 if inrange(ER72172, 0,112) & ((couple == 1 & headmale == 1) | (singlemale == 1))
replace laborm2018 = ER72449 if inrange(ER72449, 0,112) & (couple == 1 & headmale == 0)

gen laborf2018 = ER72172 if inrange(ER72172, 0,112) & ((couple == 1 & headmale == 0) | (singlefemale == 1))
replace laborf2018 = ER72449 if inrange(ER72449, 0,112) & (couple == 1 & headmale == 1)

*ER72718 "F1A HOUSEWORK HOURS-REFERENCE PERSON": These next few questions are about how (you spend/[Reference Person] spends) (your/his/her) time (CURRENTLY WORKING: when [you are/[he/she] is] not working). In a typical week, how many hours (do you/does [he/she]) spend doing housework, for example, cooking, cleaning, and other work around the house? The values for this variable represent the actual number of hours per week Reference Person spends on housework
*ER72731 "F1A HOUSEWORK HRS-SPOUSE": These next few questions are about how (you spend/[Spouse/Partner] spends) (your/his/her) time (CURRENTLY WORKING: when [you are/[he/she] is] not working). In a typical week, how many hours (do you/does [he/she]) spend doing housework, for example, cooking, cleaning, and other work around the house? The values for this variable represent the actual number of hours per week Spouse/Partner spends on housework. The values for this variable represent the actual number of hours per week Spouse/Partner spends on housework
gen hworkm = ER72718 if inrange(ER72718,0,112) & ((couple == 1 & headmale == 1) | (singlemale == 1))
replace hworkm = ER72731 if inrange(ER72731,0,112) & (couple == 1 & headmale == 0)
replace hworkm = 0 if singlefemale == 1

gen hworkf = ER72731 if inrange(ER72731,0,112) & (couple == 1 & headmale == 1) 
replace hworkf = ER72718 if inrange(ER72718,0,112) & ((couple == 1 & headmale == 0) | (singlefemale == 1))
replace hworkf = 0 if singlemale == 1

*ER72722 "F1D CHILD CARE HRS-REF PERSON": In a typical week, how many hours [do you/does [he/she]] spend) Caring for or looking after children? (CURRENTLY WORKING: Exclude hours providing care if this is [your/his/her] job.) The values for this variable represent the actual number of hours per week the Reference Person spends caring for or looking after children.
*ER72735 "F1D CHILD CARE HRS-SPOUSE": In a typical week, how many hours [do you/does [he/she]] spend) Caring for or looking after children? (CURRENTLY WORKING: Exclude hours providing care if this is [your/his/her] job.) The values for this variable represent the actual number of hours per week Spouse/Partner spends looking after children.
gen chcarem = ER72722 if inrange(ER72722,0,168) & ((couple == 1 & headmale == 1) | (singlemale == 1))
replace chcarem = ER72735 if inrange(ER72735,0,168) & (couple == 1 & headmale == 0)
replace chcarem = 0 if singlefemale == 1

gen chcaref = ER72735 if inrange(ER72735,0,168) & (couple == 1 & headmale == 1) 
replace chcaref = ER72722 if inrange(ER72722,0,168) & ((couple == 1 & headmale == 0) | (singlefemale == 1))
replace chcaref = 0 if singlemale == 1

drop if (missing(laborm) | missing(hworkm) | missing(chcarem)) & (couple == 1 | singlemale == 1)
drop if (missing(laborf) | missing(hworkf) | missing(chcaref)) & (couple == 1 | singlefemale == 1)

gen leisurem = 112 - laborm - hworkm - chcarem if (couple == 1 | singlemale == 1)
gen leisuref = 112 - laborf - hworkf - chcaref if (couple == 1 | singlefemale == 1)

drop if leisurem < 0  & (couple == 1 | singlemale == 1)
drop if leisuref < 0  & (couple == 1 | singlefemale == 1)

drop if !missing(wagem) & (outlaborm == 1 | involuntarym == 1) & (couple == 1 | singlemale == 1)
drop if !missing(wagef) & (outlaborf == 1 | involuntaryf == 1) & (couple == 1 | singlefemale == 1)

drop if !missing(laborm) & laborm > 0 & missing(wagem) & (couple == 1 | singlemale == 1)
drop if !missing(laborf) & laborf > 0 & missing(wagef) & (couple == 1 | singlefemale == 1)

drop if !missing(wagem) & laborm == 0 & (couple == 1 | singlemale == 1)
drop if !missing(wagef) & laborf == 0 & (couple == 1 | singlefemale == 1)



*************************** EXPENDITURE ************************
* ER77513 Total Family Food Expenditure: Generated variable combining expenditures for food at home, delivered, and eaten away from home.
* ER77520 Total Family Housing Expenditure: Generated variable combining expenditures for mortgage and loan payments, rent, property tax, insurance, utilities, cable TV, telephone, internet charges, home repairs, and home furnishings.
* ER77539 Total Family Transportation Expenditure: Generated variable combining expenditures for vehicle loan, lease, and down payments, insurance, other vehicle expenditures, repairs and maintenance, gasoline, parking and car pool, bus fares and train fares, taxicabs and other transportation.
* ER77562 How much in total were these (school-related) expenses ?
* ER77564 How much did you (and your family living there) pay for child care in 2018?
* ER77566 Total Family Health Care Expenditure: Generated variable combining expenditures for hospital and nursing home, doctor, prescription drugs and insurance. 
* ER77575 (Not including any amounts already reported/How) much did you (and your family living there) spend altogether in 2018 on computers, laptops, tablets, cell phones, game players, and software for these devices?
* ER77581 How much did you (and your family living there) spend altogether in 2018 on clothing and apparel, including footwear, outerwear, and products such as watches or jewelry?
* ER77583 How much did you (and your family living there) spend altogether in 2018 on trips and vacations, including transportation, accommodations, and recreational expenses on trips?
* ER77585 How much did you (and your family living there) spend altogether in 2018 on recreation and entertainment, including tickets to movies, sporting events, and performing arts and hobbies including exercise, bicycles, trailers, camping, photography, and reading materials? (Do not include costs associated with the trips and vacations you mentioned previously.)
* ER77587 "TOTAL EXPENDITURE" Total Expenditure: Generated variable combining all expenditures, excluding rent value ER77523
* ER77588 "TOTAL CONSUMPTION WITH RENTAL VALUE": Total Consumption with Rental Value: Generated variable combining all expenditures. Mortgage ER77521 and property tax expenses ER77527 are excluded and replaced with the rent value ER77523.

gen exp_food       = ER77513 
gen exp_housing    = ER77520
gen exp_transport  = ER77539
gen exp_school     = ER77562
gen exp_chcare     = ER77564
gen exp_health     = ER77566
gen exp_computer   = ER77575
gen exp_clothing   = ER77581
gen exp_trips      = ER77583 
gen exp_recreation = ER77585

egen exp_public = rowtotal(exp_housing exp_transport exp_chcare exp_health exp_trips)
egen exp_private = rowtotal(exp_food exp_school exp_computer exp_clothing exp_recreation)

gen exp_total = exp_public + exp_private
gen exp_total_check = ER77587

gen Q = (exp_public*0.5)/52
gen q = (exp_private + exp_public*0.5)/52

gen nonlabor = Q + q - wagem*laborm - wagef*laborf if !missing(wagem) & !missing(wagef) & couple == 1
replace nonlabor = Q + q - wagem*laborm if !missing(wagem) & missing(wagef) & couple == 1
replace nonlabor = Q + q - wagef*laborf if missing(wagem) & !missing(wagef) & couple == 1
replace nonlabor = Q + q if missing(wagem) & missing(wagef) & couple == 1
replace nonlabor = Q + q - wagem*laborm if !missing(wagem) & singlemale == 1
replace nonlabor = Q + q  if missing(wagem) & singlemale == 1
replace nonlabor = Q + q - wagef*laborf if !missing(wagef) & singlefemale == 1
replace nonlabor = Q + q  if missing(wagef) & singlefemale == 1



*********************************** EDUCATION *********************************
* ER77599 "COMPLETED ED-RP": Reference Person's Completed Education Level
* ER77600 "COMPLETED ED-SP": Spouse's/Partner's Completed Education Level
* (Values in the range 1-16 represent the actual grade of school completed; e.g., a value of 8 indicates that the Head completed the eighth grade. A code value of 17 indicates that the Head completed at least some postgraduate work)  Education is asked only when the FU acquires a new Head. In cases where the Head has remained the same person from the previous 
* interview (ER15890=5), this variable has been carried forward from previous years' data with no updating or other change. Education was asked of all Heads in 1985. This variable differs from the individual level variable ER33516 in the treatment of Heads who received a GED but did not attend college. The family-level variable value for such Heads is the highest grade completed; for the individual-level variable, the value is 12.

gen edulevelm = ER77599 if inrange(ER77599, 0,17) & ((couple == 1 & headmale == 1) | (singlemale == 1))
replace edulevelm = ER77600 if inrange(ER77600,0,17) & (couple == 1 & headmale == 0)

gen edulevelf = ER77600 if inrange(ER77600,0,17) & (couple == 1 & headmale == 1) 
replace edulevelf = ER77599 if inrange(ER77599, 0,17) & ((couple == 1 & headmale == 0) | (singlefemale == 1))

gen edum = 1 if edulevelm <= 12 & (couple == 1 | singlemale == 1)
replace edum = 2 if edulevelm > 12 & (couple == 1 | singlemale == 1)

gen eduf = 1 if edulevelf <= 12 & (couple == 1 | singlefemale == 1)
replace eduf = 2 if edulevelf > 12 & (couple == 1 | singlefemale == 1)



************************* HEIGHT AND WEIGHT **********************************
* ER74577 "H23 HEIGHT-INCHES-RP": How tall (are you/is Reference Person)?--INCHES
* ER75704 "H23 HEIGHT-INCHES--SPOUSE": How tall (are you/is [Spouse/Partner])?--INCHES

gen heightm = ER74577 if inrange(ER74577, 1,11) & ((couple == 1 & headmale == 1) | (singlemale == 1))
replace heightm = ER75704 if inrange(ER75704, 1,11) & (couple == 1 & headmale == 0)

gen heightf = ER75704 if inrange(ER75704, 1,11) & (couple == 1 & headmale == 1) 
replace heightf = ER74577 if inrange(ER74577, 1,11) & ((couple == 1 & headmale == 0) | (singlefemale == 1))

* ER74574 "H22 WEIGHT IN POUNDS-RP": About how much (do you/does Reference Person) weigh?--POUNDS
* ER75701 "H22 WEIGHT IN POUNDS--SPOUSE" About how much (do you/does [Spouse/Partner]) weigh?--POUNDS

gen weightm = ER74574 if inrange(ER74574, 1,400) & ((couple == 1 & headmale == 1) | (singlemale == 1))
replace weightm = ER75701 if inrange(ER75701, 1,400) & (couple == 1 & headmale == 0)

gen weightf = ER75701 if inrange(ER75701, 1,400) & (couple == 1 & headmale == 1) 
replace weightf = ER74574 if inrange(ER74574, 1,400) & ((couple == 1 & headmale == 0) | (singlefemale == 1))



************************** REGION AND STATE **************************
*ER77591 "CURRENT REGION": Geographical Region of the 2019 Interview
*States were assigned to regions as follows:
*NORTHEAST: Connecticut, Maine, Massachusetts, New Hampshire, New Jersey, New York, Pennsylvania, Rhode Island, Vermont
*NORTH CENTRAL: Illinois, Indiana, Iowa, Kansas, Michigan, Minnesota, Missouri, Nebraska, North Dakota, Ohio, South Dakota, Wisconsin
*SOUTH: Alabama, Arkansas, Delaware, Florida, Georgia, Kentucky, Louisiana, Maryland, Mississippi, North Carolina, Oklahoma, South Carolina, Tennessee, Texas, Virginia, Washington DC, West Virginia
*WEST: Arizona, California, Colorado, Idaho, Montana, Nevada, New Mexico, Oregon, Utah, Washington, Wyoming.

gen region = ER77591 if inrange(ER77591, 1,6)
label define region 1 "Northeast" 2 "North Central" 3 "South" 4 "West" 5 "Alaska Hawaii" 6 "Foreign"
label values region region

*ER77595 "REGION REFERENCE PERSON GREW UP": Geographical Region in Which Reference Person Grew Up
*ER77597 "REGION SP GREW UP": Geographical Region in Which Spouse/Partner Grew Up

gen regiongrewm = ER77595 if inrange(ER77595, 1,6) & ((couple == 1 & headmale == 1) | (singlemale == 1))
replace regiongrewm = ER77597 if inrange(ER77597, 1,6) & (couple == 1 & headmale == 0)

gen regiongrewf = ER77597 if inrange(ER77597, 1,6) & (couple == 1 & headmale == 1) 
replace regiongrewf = ER77595 if inrange(ER77595, 1,6) & ((couple == 1 & headmale == 0) | (singlefemale == 1))

*ER76889 "L33 STATE REFERENCE PERSON WAS BORN"
*ER76744 "K33 STATE SPOUSE WAS BORN"

gen statebornm = ER76889 if inrange(ER76889, 1,56) & ((couple == 1 & headmale == 1) | (singlemale == 1))
replace statebornm = ER76744 if inrange(ER76744, 1,56) & (couple == 1 & headmale == 0)

gen statebornf = ER76744 if inrange(ER76744, 1,56) & (couple == 1 & headmale == 1) 
replace statebornf = ER76889 if inrange(ER76889, 1,56) & ((couple == 1 & headmale == 0) | (singlefemale == 1))



********************************** RACE **************************************
*ER76897 "L40 RACE OF REFERENCE PERSON-MENTION 1": What is (your/his/her) race? (Are you/Is [he/she]) white, black, American Indian, Alaska Native, Asian, Native Hawaiian or other Pacific Islander?
*ER76752 "K40 RACE OF SPOUSE-MENTION 1": What is (your/her/his) race? (Are you/Is [he/she]) white, black, American Indian, Alaska Native, Asian, Native Hawaiian or Pacific Islander?

gen racem = ER76897 if !inlist(ER76897, 9) & ((couple == 1 & headmale == 1) | (singlemale == 1))
replace racem = ER76752 if !inlist(ER76752, 9) & (couple == 1 & headmale == 0)

gen racef = ER76752 if !inlist(ER76752, 9) & (couple == 1 & headmale == 1) 
replace racef = ER76897 if !inlist(ER76897, 9) & ((couple == 1 & headmale == 0) | (singlefemale == 1))

label define race 1 "White" 2 "Black" 3 "American Indian" 4 "Asian" 5 "Native Hawaiian" 7 "Other"
label values racem race
label values racef race



******************** SAMPLE SELECION *******************
drop if (agem < 25 | agem > 65 | missing(agem)) & (couple == 1 | singlemale == 1)
drop if (agef < 25 | agef > 65 | missing(agef)) & (couple == 1 | singlefemale == 1)
drop if (missing(edulevelm)) & (couple == 1 | singlemale == 1)
drop if (missing(edulevelf)) & (couple == 1 | singlefemale == 1)

g male_emp = 1 if !missing(wagem)
replace male_emp = 0 if missing(wagem) & singlemale == 1
replace male_emp = 0 if missing(wagem) & couple == 1

g female_emp = 1 if !missing(wagef)
replace female_emp = 0 if missing(wagef) & singlefemale == 1
replace female_emp = 0 if missing(wagef) & couple == 1

label define employed 1 "Yes" 0 "No"
label values male_emp employed
label values female_emp employed

g employcatm = 1 if !missing(wagem)
replace employcatm = 2 if outlaborm == 1
replace employcatm = 3 if involuntarym == 1
replace employcatm = 0 if singlefemale == 1

g employcatf = 1 if !missing(wagef)
replace employcatf = 2 if outlaborf == 1
replace employcatf = 3 if involuntaryf == 1
replace employcatf = 0 if singlemale == 1

gen fulltimem = .
gen fulltimef = .

replace fulltimem = 1 if laborm >= 30 & !missing(laborm) & (couple == 1 | singlemale == 1) & male_emp == 1
replace fulltimem = 0 if laborm < 30 & !missing(laborm) & (couple == 1 | singlemale == 1) & male_emp == 1

replace fulltimef = 1 if laborf >= 30 & !missing(laborf) & (couple == 1 | singlefemale == 1) & female_emp == 1
replace fulltimef = 0 if laborf < 30 & !missing(laborf) & (couple == 1 | singlefemale == 1) & female_emp == 1

* Order the variables from most to least needed	
order familyid nhhmem nchild couple singlemale singlefemale region age* edu* height* weight*  regiongrew* stateborn* race* wage* labor* hwork* chcare* leisure* Q q nonlabor exp* outlabor* involuntary* employcat* fulltime* male_emp female_emp
save PSID2019, replace

export excel familyid nhhmem nchild couple singlemale singlefemale region age* edu* height* weight* regiongrew* stateborn* race* wage* labor* hwork* chcare* leisure* Q q nonlabor exp* outlabor* involuntary* employcat* fulltime* male_emp female_emp using "PSID2019", nolabel firstrow(variables) replace


*================== CHECK POINT ====================*
count if (couple == 1 | singlemale == 1)
count if (couple == 1 | singlemale == 1) & !missing(wagem)

tab male_emp employcatm
tab male_emp fulltimem

tabstat laborm wagem if male_emp == 1 & (couple == 1 | singlemale == 1), stat(N min mean max)
tabstat laborm wagem if male_emp == 0 & (couple == 1 | singlemale == 1), stat(N min mean max)

count if (couple == 1 | singlefemale == 1)
count if (couple == 1 | singlefemale == 1) & !missing(wagef)

tab female_emp employcatf
tab female_emp fulltimef

tabstat laborf wagef if female_emp == 1 & (couple == 1 | singlefemale == 1), stat(N min mean max)
tabstat laborf wagef if female_emp == 0 & (couple == 1 | singlefemale == 1), stat(N min mean max)


**********************************************************************************
********************************** SUMMARY STATISTICS ****************************
**********************************************************************************
clear all
set maxvar 32000

* Open the selected sample file
use "J:\Staff Areas\ks1779\Research\Projects\Project - Nonparticipation\Application_V2\V5_17092024\PSID\PSID2019.dta" /// replace the directory here

g hhtype = 1 if couple == 1
replace hhtype = 2 if singlemale == 1
replace hhtype = 3 if singlefemale == 1
label define hhtype 1 "Couple" 2 "Single male" 3 "Single female"
label values hhtype hhtype 


************ TABLE 1 ****************
tab male_emp female_emp, cell
tab male_emp if couple == 0
tab female_emp if couple == 0


************ TABLE 2 ****************
tab edum eduf, cell
tab edum if couple == 0
tab eduf if couple == 0



************ TABLES 3 & 4 ****************
* 11 = employment no, education low
* 12 = employment no, education high
* 21 = employment yes, education low
* 22 = employment yes, education high

gen typem = 10*(1 + male_emp) + edum  
gen typef = 10*(1 + female_emp) + eduf

gen consumption = q + Q

tabstat consumption leisurem leisuref if typem == 11 & couple == 1, by(typef) format(%9.2f) stat(N mean)
tabstat consumption leisurem leisuref if typem == 12 & couple == 1, by(typef) format(%9.2f) stat(N mean)
tabstat consumption leisurem leisuref if typem == 21 & couple == 1, by(typef) format(%9.2f) stat(N mean)
tabstat consumption leisurem leisuref if typem == 22 & couple == 1, by(typef) format(%9.2f) stat(N mean)

tabstat consumption leisurem leisuref if typef == 11 & couple == 1, by(typem) format(%9.2f) stat(N mean) 
tabstat consumption leisurem leisuref if typef == 12 & couple == 1, by(typem) format(%9.2f) stat(N mean)
tabstat consumption leisurem leisuref if typef == 21 & couple == 1, by(typem) format(%9.2f) stat(N mean)
tabstat consumption leisurem leisuref if typef == 22 & couple == 1, by(typem) format(%9.2f) stat(N mean)

tabstat consumption leisurem leisuref if couple == 0, by(typem) format(%9.2f) stat(N mean)
tabstat consumption leisurem leisuref if couple == 0, by(typef) format(%9.2f) stat(N mean)


************ TABLE 5 ****************
gen agecatm = 1 if (25 <= agem & agem <= 35) & (couple == 1 | singlemale == 1)
replace agecatm = 2 if (35 < agem & agem <= 50) & (couple == 1 | singlemale == 1)
replace agecatm = 3 if (50 < agem) & (couple == 1 | singlemale == 1)

gen agecatf = 1 if (25 <= agef & agef <= 35) & (couple == 1 | singlefemale == 1)
replace agecatf = 2 if (35 < agef & agef <= 50) & (couple == 1 | singlefemale == 1)
replace agecatf = 3 if (50 < agef) & (couple == 1 | singlefemale == 1)

gen havekid = 0 if nchild == 0 & !missing(nchild)
replace havekid = 1 if nchild > 0 & !missing(nchild)

g male_lowedu = 1 if edum == 1
replace male_lowedu = 0 if edum == 2 & singlemale == 1
replace male_lowedu = 0 if edum == 2 & couple == 1

g female_lowedu = 1 if eduf == 1
replace female_lowedu = 0 if eduf == 2 & singlefemale == 1
replace female_lowedu = 0 if eduf == 2 & couple == 1

tab hhtype

tabstat male_emp, by(hhtype) format(%9.4f)
tabstat female_emp, by(hhtype) format(%9.4f)
tabstat havekid, by(hhtype) format(%9.4f)
tabstat male_lowedu, by(hhtype) format(%9.4f)
tabstat female_lowedu, by(hhtype) format(%9.4f)
tab agecatm hhtype, col 
tab agecatf hhtype, col 

gen chorehm = hworkm + chcarem
gen chorehf = hworkf + chcaref

tabstat wagem wagef laborm laborf chorehm chorehf leisurem leisuref q Q, by(hhtype) format(%9.2f)


************ TABLES 18-22 ****************
gen typehhm = 100*edum + 10*(1 + havekid) + agecatm if singlefemale == 0
gen typehhf = 100*eduf + 10*(1 + havekid) + agecatf if singlemale == 0
replace typehhm = 0 if singlefemale == 1
replace typehhf = 0 if singlemale == 1

gen typehh = typehhm*1000 + typehhf

tab typem typef if couple == 1, cell
tab typem if singlemale == 1
tab typef if singlefemale == 1


gen percapita_consumption = consumption if couple == 0
replace percapita_consumption = 0.5*consumption if couple == 1

quietly sum percapita_consumption, de
local povline = 0.6*r(p50)
di `povline'
gen poor = 1 if percapita_consumption < `povline'
replace poor = 0 if missing(poor)


************ TABLES 7 & 8 ****************
tabstat consumption if singlemale == 1, by(typem) format(%9.2f)
tabstat consumption if singlefemale == 1, by(typef) format(%9.2f)


************ TABLE 9 ****************
gen scale = (q + 2*Q)/(q + Q)
tabstat consumption scale if couple == 1 & typem == 11, by(typef) format(%9.2f)
tabstat consumption scale if couple == 1 & typem == 12, by(typef) format(%9.2f)
tabstat consumption scale if couple == 1 & typem == 21, by(typef) format(%9.2f)
tabstat consumption scale if couple == 1 & typem == 22, by(typef) format(%9.2f)


************ TABLE 10 ****************
tabstat poor if couple == 1, by(typem) format(%9.4f)
tabstat poor if singlemale == 1, by(typem) format(%9.4f)

tabstat poor if couple == 1, by(typef) format(%9.4f)
tabstat poor if singlefemale == 1, by(typef) format(%9.4f)


************ TABLE 11 ****************
tabstat poor if typem == 11 & couple == 1, by(typef) format(%9.4f)
tabstat poor if typem == 12 & couple == 1, by(typef) format(%9.4f)
tabstat poor if typem == 21 & couple == 1, by(typef) format(%9.4f)
tabstat poor if typem == 22 & couple == 1, by(typef) format(%9.4f)


************ TABLE 12 ****************
tabstat poor if typef == 11 & couple == 1, by(typem) format(%9.4f)
tabstat poor if typef == 12 & couple == 1, by(typem) format(%9.4f)
tabstat poor if typef == 21 & couple == 1, by(typem) format(%9.4f)
tabstat poor if typef == 22 & couple == 1, by(typem) format(%9.4f)


************ TABLE 13 ****************
local povline_leisure = 34.8
gen poorm_time = 1 if leisurem < `povline_leisure' & !missing(leisurem)
replace poorm_time = 0 if missing(poorm_time) & !missing(leisurem)

gen poorf_time = 1 if leisuref < `povline_leisure' & !missing(leisuref)
replace poorf_time = 0 if missing(poorf_time) & !missing(leisuref)

tabstat poorm_time if couple == 1, by(typem) format(%9.4f)
tabstat poorm_time if singlemale == 1, by(typem) format(%9.4f)

tabstat poorf_time if couple == 1, by(typef) format(%9.4f)
tabstat poorf_time if singlefemale == 1, by(typef) format(%9.4f)


* Count % of involuntary unemployed (relative to all individuals)
qui count if couple == 1
local ncouple = r(N)
qui count if singlemale == 1
local nsinglemale = r(N)
qui count if singlefemale == 1
local nsinglefemale = r(N)
local nindividuals = 2*`ncouple' + `nsinglemale' + `nsinglefemale'
di `nindividuals'


qui count if (employcatm == 3 & male_emp == 0 & couple == 1) | (employcatm == 3 & male_emp == 0 & singlemale == 1) 
local involuntary_male_count = r(N)
di `involuntary_male_count'
qui count if (employcatf == 3 & female_emp == 0 & couple == 1) | (employcatf == 3 & female_emp == 0 & singlefemale == 1)
local involuntary_female_count = r(N)
di `involuntary_female_count'
di `involuntary_male_count' + `involuntary_female_count'

local involuntary_count = `involuntary_male_count' + `involuntary_female_count'
di `involuntary_count'/`nindividuals'


* Count % of voluntary unemployed (relative to all unemployed)
qui count if (employcatm == 3 & male_emp == 0 & couple == 1) | (employcatm == 3 & male_emp == 0 & singlemale == 1) 
local involuntary_male_count = r(N)
di `involuntary_male_count'
qui count if (employcatf == 3 & female_emp == 0 & couple == 1) | (employcatf == 3 & female_emp == 0 & singlefemale == 1)
local involuntary_female_count = r(N)
di `involuntary_female_count'
di `involuntary_male_count' + `involuntary_female_count'



qui count if (male_emp == 0 & couple == 1) | (male_emp == 0 & singlemale == 1) 
local unemployed_male_count = r(N)
di `unemployed_male_count'
qui count if (female_emp == 0 & couple == 1) | (female_emp == 0 & singlefemale == 1)
local unemployed_female_count = r(N)
di `unemployed_female_count'
di `unemployed_male_count' + `unemployed_female_count'

di 1 - (`involuntary_male_count' + `involuntary_female_count')/(`unemployed_male_count' + `unemployed_female_count')

